# !pip install geopandas
# !pip install h3
# !pip install folium
# !pip install osmnx
# !pip install geojson
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import json
import h3
import folium
import osmnx as ox
from shapely import wkt
from folium.plugins import HeatMap
from shapely.geometry import Polygon
from folium.plugins import MarkerCluster, HeatMap
import pandas as pd
from shapely.geometry import Polygon
from geojson import Feature, Point, FeatureCollection, Polygon
import plotly.express as px
from tqdm import tqdm
tqdm.pandas()
interests_df = pd.read_csv("stupino_interests.csv")
locs_df = pd.read_csv("stupino_locs.csv")
Построение диаграммы, сколько в каждом гексагоне карты отметок пользователя
H3_res = 9 # размер гексагона [1 .. 15] чем больше, тем меньше площадь
def geo_to_h3(row):
return h3.geo_to_h3(lat=row.lat, lng=row.lon, resolution=H3_res)
locs_df['h3_cell'] = locs_df.progress_apply(geo_to_h3, axis=1)
100%|██████████| 10880142/10880142 [03:12<00:00, 56543.06it/s]
locs_df_g = (locs_df
.groupby('h3_cell')
.id
.agg(list)
.to_frame("ids")
.reset_index())
# Let's count each points inside the hexagon
locs_df_g['count'] = (locs_df_g['ids']
.progress_apply(lambda ignition_ids: len(ignition_ids)))
100%|██████████| 1573/1573 [00:00<00:00, 696467.88it/s]
from shapely.geometry import Polygon
def add_geometry(row):
points = h3.h3_to_geo_boundary(row['h3_cell'], True)
return Polygon(points)
#Apply function into our dataframe
locs_df_g['geometry'] = (locs_df_g
.progress_apply(add_geometry, axis=1))
100%|██████████| 1573/1573 [00:00<00:00, 9454.89it/s]
def hexagons_dataframe_to_geojson(df_hex, hex_id_field, geometry_field, value_field, file_output=None):
list_features = []
for i, row in df_hex.iterrows():
feature = Feature(geometry=row[geometry_field],
id=row[hex_id_field],
properties={"value": row[value_field]})
list_features.append(feature)
feat_collection = FeatureCollection(list_features)
if file_output is not None:
with open(file_output, "w") as f:
json.dump(feat_collection, f)
else:
return feat_collection
geojson_obj = (hexagons_dataframe_to_geojson
(locs_df_g,
hex_id_field='h3_cell',
value_field='count',
geometry_field='geometry'))
import plotly.express as px
fig = px.choropleth_mapbox(
locs_df_g,
geojson=geojson_obj,
locations='h3_cell',
color='count',
color_continuous_scale="Viridis",
range_color=(0, locs_df_g['count'].mean()),
mapbox_style='carto-positron',
zoom=12,
center={"lat": locs_df.lat.mean(), "lon": locs_df.lon.mean()},
opacity=0.1,
labels={'count': 'count of data'})
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.show("notebook")
ages_df = interests_df[[age for age in interests_df.columns if age.startswith("age_")]]
ages_df.apply(sum).to_frame().style.bar()
| 0 | |
|---|---|
| age_18_24 | 50297 |
| age_25_34 | 53941 |
| age_35_44 | 41632 |
| age_45_54 | 22903 |
| age_17 | 15314 |
| age_55 | 27033 |
gender_df = interests_df[[gender for gender in interests_df.columns if gender.startswith("gender_")]]
gender_df.apply(sum).to_frame().style.bar()
| 0 | |
|---|---|
| gender_female | 73533 |
| gender_male | 73437 |
employment_df = interests_df[[user for user in interests_df.columns if user.startswith("employment_")]]
employment_df.apply(sum).to_frame().style.bar()
| 0 | |
|---|---|
| employment_working | 78881 |
| employment_not_working | 26591 |
interests_df[[user for user in interests_df.columns if user.startswith("availability_of_education_")]].apply(
sum).to_frame().style.bar()
| 0 | |
|---|---|
| availability_of_education_has_a_higher_education | 55189 |
| availability_of_education_no_higher_education | 64654 |
interests_df[[user for user in interests_df.columns if user.startswith("children_")]].apply(sum).to_frame().style.bar()
| 0 | |
|---|---|
| children_under_16_there_are_children_in_the_family | 71838 |
| children_under_16_no_children_in_the_family | 78563 |
interests_df[[user for user in interests_df.columns if user.startswith("marital_")]].apply(sum).to_frame().style.bar()
| 0 | |
|---|---|
| marital_status_married | 22707 |
| marital_status_not_married | 52332 |
interests_df[[user for user in interests_df.columns if "individual_income_" in user]].apply(sum).to_frame().style.bar()
| 0 | |
|---|---|
| individual_income_a_below_average_income | 5453 |
| individual_income_b_average_income | 11620 |
| individual_income_c_above_average_income | 5440 |
| individual_income_d_high_income | 2085 |
| individual_income_e_premium | 1262 |
interests_df[[user for user in interests_df.columns if "household_income_" in user]].apply(sum).to_frame().style.bar()
| 0 | |
|---|---|
| household_income_a_below_average | 3917 |
| household_income_b_average | 50697 |
| household_income_c_above_average | 86867 |
interests_df[[user for user in interests_df.columns if user.startswith("interests_")]].apply(sum).sort_values(
ascending=False).to_frame().style.bar()
| 0 | |
|---|---|
| interests_banks_banking_services | 61024 |
| interests_new_buildings | 58974 |
| interests_car_owners | 55978 |
| interests_b2b_documentary_and_financial_and_legal_support | 52628 |
| interests_auto_premium_class | 51677 |
| interests_medium_and_large_business | 51580 |
| interests_renting_residential_property | 50192 |
| interests_b2b_it_for_business | 50041 |
| interests_auto_middle_class | 50041 |
| interests_auto_economy_class | 49551 |
| interests_b2b_raw_materials | 48303 |
| interests_resale_property | 48132 |
| interests_baby_products | 47634 |
| interests_interest_in_buying_a_new_car | 47208 |
| interests_contributions_and_deposits | 46209 |
| interests_mortgage | 45086 |
| interests_loans_for_business | 44978 |
| interests_credit_cards | 44713 |
| interests_interest_in_insurance | 44569 |
| interests_consumer_loans | 42902 |
| interests_interest_in_buying_a_new_premium_car | 42245 |
| interests_auto_insurance | 41112 |
| interests_moto | 40302 |
| interests_interest_in_buying_a_new_economy_class_car | 39955 |
| interests_interest_in_buying_a_mobile_phone | 39740 |
| interests_freight_and_commercial_vehicles | 39159 |
| interests_auto_parts_and_service | 38710 |
| interests_interest_in_buying_a_new_middle_class_car | 38656 |
| interests_all_about_children | 37114 |
| interests_overseas_real_estate | 36566 |
| interests_wedding | 35279 |
| interests_b2b_trade_equipment_and_goods_wholesale | 34546 |
| interests_used_cars | 34170 |
| interests_b2b_medical_equipment | 34113 |
| interests_baby_food | 32770 |
| interests_b2b_agriculture | 32546 |
| interests_mobile_devices | 30739 |
| interests_television_and_video_equipment | 29597 |
| interests_special_equipment | 29147 |
| interests_car_loans | 26554 |
| interests_cell_phones_and_headset | 25890 |
| interests_auto_suvs | 25263 |
| interests_tires_and_wheels | 24931 |
| interests_parents_of_toddlers | 23065 |
| interests_b2b_office | 20957 |
| interests_telecom_operators | 20718 |
| interests_quotes_stock_markets | 20256 |
| interests_laptops_and_netbooks | 19950 |
| interests_microloans | 19630 |
| interests_using_online_banking | 19531 |
| interests_pregnancy_and_childbirth | 19376 |
| interests_learning_languages | 18118 |
| interests_parents_of_middle_and_high_school_students | 17960 |
| interests_use_of_electronic_money | 17907 |
| interests_of_parents_of_primary_school_students | 16924 |
| interests_parents_of_newborns | 16849 |
| interests_b2b_equipment_machines_energy_supply | 16636 |
| interests_mobile_communications_and_internet_access | 16358 |
| interests_photo_and_video_cameras | 16136 |
| interests_houses_cottages_and_land_plots | 15382 |
| interests_education | 14821 |
| interests_small_business | 14804 |
| interests_audio_engineering | 14345 |
| interests_business_education | 13252 |
| interests_parents_of_preschoolers | 11619 |
| interests_internet_access | 11511 |
| interests_childrens_health | 11186 |
| interests_finance_and_accounting | 11114 |
| interests_commercial_real_estate | 9786 |
| interests_basic | 8983 |
| interests_legal_support | 8964 |
| interests_preschool | 8252 |
| interests_b2b_advertising_and_marketing | 6442 |
| interests_higher | 5035 |
| interests_auto_electronics_and_gps | 5010 |
| interests_tablets_and_ereaders | 3532 |
| interests_average | 2693 |
| interests_tvs | 2620 |
| interests_human_resources | 877 |
| interests_specialized_secondary | 875 |
| interests_active_mobile_internet_users | 859 |
h3_cell = "89118172457ffff"
h3_cell_from_center = "8911817240fffff"
ids = locs_df_g[locs_df_g["h3_cell"] == h3_cell_from_center]["ids"].to_list()[0]
df = interests_df[[user for user in interests_df.columns if user.startswith("interests_") or "id" in user]]
df = df[df.id.isin(ids)]
df.drop("id", axis=1).apply(sum).sort_values(ascending=False).to_frame().style.bar()
| 0 | |
|---|---|
| interests_banks_banking_services | 998 |
| interests_new_buildings | 975 |
| interests_car_owners | 911 |
| interests_baby_products | 839 |
| interests_resale_property | 812 |
| interests_all_about_children | 795 |
| interests_parents_of_toddlers | 721 |
| interests_cell_phones_and_headset | 719 |
| interests_parents_of_middle_and_high_school_students | 688 |
| interests_b2b_documentary_and_financial_and_legal_support | 659 |
| interests_of_parents_of_primary_school_students | 655 |
| interests_mortgage | 649 |
| interests_b2b_office | 612 |
| interests_parents_of_newborns | 594 |
| interests_medium_and_large_business | 594 |
| interests_pregnancy_and_childbirth | 572 |
| interests_learning_languages | 555 |
| interests_renting_residential_property | 530 |
| interests_special_equipment | 519 |
| interests_auto_premium_class | 510 |
| interests_b2b_it_for_business | 491 |
| interests_mobile_communications_and_internet_access | 491 |
| interests_auto_parts_and_service | 480 |
| interests_baby_food | 476 |
| interests_education | 468 |
| interests_microloans | 458 |
| interests_auto_middle_class | 454 |
| interests_moto | 432 |
| interests_auto_economy_class | 423 |
| interests_television_and_video_equipment | 422 |
| interests_interest_in_insurance | 419 |
| individual_income_b_average_income | 417 |
| interests_houses_cottages_and_land_plots | 397 |
| interests_small_business | 385 |
| interests_interest_in_buying_a_mobile_phone | 370 |
| interests_mobile_devices | 369 |
| interests_b2b_raw_materials | 367 |
| interests_telecom_operators | 322 |
| interests_internet_access | 322 |
| interests_interest_in_buying_a_new_car | 318 |
| interests_finance_and_accounting | 314 |
| interests_basic | 309 |
| interests_parents_of_preschoolers | 309 |
| interests_auto_suvs | 308 |
| interests_freight_and_commercial_vehicles | 291 |
| interests_loans_for_business | 285 |
| interests_used_cars | 280 |
| interests_contributions_and_deposits | 277 |
| individual_income_c_above_average_income | 260 |
| interests_childrens_health | 253 |
| interests_credit_cards | 244 |
| interests_commercial_real_estate | 241 |
| interests_photo_and_video_cameras | 234 |
| interests_tires_and_wheels | 231 |
| individual_income_a_below_average_income | 221 |
| interests_laptops_and_netbooks | 210 |
| interests_audio_engineering | 193 |
| interests_b2b_trade_equipment_and_goods_wholesale | 186 |
| interests_legal_support | 184 |
| interests_use_of_electronic_money | 179 |
| interests_consumer_loans | 166 |
| interests_higher | 166 |
| interests_using_online_banking | 156 |
| interests_wedding | 151 |
| interests_preschool | 143 |
| interests_auto_insurance | 143 |
| interests_car_loans | 136 |
| interests_auto_electronics_and_gps | 132 |
| interests_quotes_stock_markets | 126 |
| interests_interest_in_buying_a_new_middle_class_car | 125 |
| interests_b2b_equipment_machines_energy_supply | 120 |
| interests_overseas_real_estate | 119 |
| interests_average | 117 |
| interests_interest_in_buying_a_new_premium_car | 110 |
| interests_interest_in_buying_a_new_economy_class_car | 108 |
| interests_b2b_advertising_and_marketing | 106 |
| individual_income_d_high_income | 105 |
| interests_business_education | 100 |
| interests_tablets_and_ereaders | 98 |
| interests_tvs | 78 |
| interests_b2b_medical_equipment | 70 |
| individual_income_e_premium | 54 |
| interests_specialized_secondary | 41 |
| interests_human_resources | 38 |
| interests_b2b_agriculture | 17 |
| interests_active_mobile_internet_users | 13 |
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
all_int_df = interests_df[[user for user in interests_df.columns if user.startswith("interests_")]].apply(sum)
# plt.xticks(range(len(new_df)), keys,rotation=90)
ids = locs_df_g[locs_df_g["h3_cell"] == h3_cell_from_center]["ids"].to_list()[0]
df = interests_df[[user for user in interests_df.columns if user.startswith("interests_") or "id" in user]]
df = df[df.id.isin(ids)]
df = df[[user for user in df.columns if user.startswith("interests_")]].apply(sum)
# df = df.drop("id", axis=1).apply(sum)
df
# all_int_df.plot(kind="bar", position=0, width=1.0, rot=90, figsize=(18,5))
# df.plot(kind="bar", position=0, width=1.0, rot=90, figsize=(18,5))
interests_b2b_advertising_and_marketing 106
interests_b2b_raw_materials 367
interests_b2b_equipment_machines_energy_supply 120
interests_b2b_office 612
interests_b2b_documentary_and_financial_and_legal_support 659
...
interests_parents_of_toddlers 721
interests_parents_of_preschoolers 309
interests_of_parents_of_primary_school_students 655
interests_parents_of_middle_and_high_school_students 688
interests_business_education 100
Length: 81, dtype: int64
ddf = pd.DataFrame({"all": all_int_df, "hex": df})
normalized_df = (ddf - ddf.min()) / (ddf.max() - ddf.min())
normalized_df.plot(kind='bar', alpha=0.7, position=0, width=1.0, rot=90, figsize=(18, 5))
<AxesSubplot: >
diff = normalized_df["all"] - normalized_df["hex"]
diff.plot(kind='bar', alpha=0.7, position=0, width=1.0, rot=90, figsize=(18, 5))
<AxesSubplot: >
diff.sort_values(ascending=True).to_frame().style.bar()
| 0 | |
|---|---|
| interests_parents_of_middle_and_high_school_students | -0.401044 |
| interests_of_parents_of_primary_school_students | -0.384761 |
| interests_parents_of_toddlers | -0.349697 |
| interests_parents_of_newborns | -0.324079 |
| interests_cell_phones_and_headset | -0.300712 |
| interests_b2b_office | -0.274074 |
| interests_learning_languages | -0.263393 |
| interests_pregnancy_and_childbirth | -0.259742 |
| interests_education | -0.229867 |
| interests_mobile_communications_and_internet_access | -0.227671 |
| interests_all_about_children | -0.191316 |
| interests_basic | -0.165479 |
| interests_houses_cottages_and_land_plots | -0.148462 |
| interests_small_business | -0.145886 |
| interests_microloans | -0.139785 |
| interests_internet_access | -0.136659 |
| interests_finance_and_accounting | -0.135136 |
| interests_parents_of_preschoolers | -0.121666 |
| interests_higher | -0.085921 |
| interests_commercial_real_estate | -0.083097 |
| interests_average | -0.075101 |
| interests_childrens_health | -0.072010 |
| interests_baby_products | -0.061133 |
| interests_auto_electronics_and_gps | -0.051819 |
| interests_special_equipment | -0.043532 |
| interests_tablets_and_ereaders | -0.041867 |
| interests_legal_support | -0.038891 |
| interests_tvs | -0.036720 |
| interests_specialized_secondary | -0.028160 |
| interests_resale_property | -0.025445 |
| interests_human_resources | -0.025082 |
| interests_new_buildings | -0.010723 |
| interests_preschool | -0.009101 |
| interests_b2b_advertising_and_marketing | -0.001621 |
| interests_active_mobile_internet_users | 0.000000 |
| interests_banks_banking_services | 0.000000 |
| interests_car_owners | 0.004456 |
| interests_telecom_operators | 0.016370 |
| interests_photo_and_video_cameras | 0.029553 |
| interests_audio_engineering | 0.041409 |
| interests_baby_food | 0.060341 |
| interests_television_and_video_equipment | 0.062425 |
| interests_mortgage | 0.089410 |
| interests_auto_suvs | 0.106125 |
| interests_use_of_electronic_money | 0.114826 |
| interests_laptops_and_netbooks | 0.117311 |
| interests_business_education | 0.117659 |
| interests_mobile_devices | 0.135213 |
| interests_b2b_equipment_machines_energy_supply | 0.153599 |
| interests_auto_parts_and_service | 0.155008 |
| interests_using_online_banking | 0.165169 |
| interests_tires_and_wheels | 0.178780 |
| interests_b2b_documentary_and_financial_and_legal_support | 0.204613 |
| interests_quotes_stock_markets | 0.207676 |
| interests_moto | 0.230200 |
| interests_medium_and_large_business | 0.253184 |
| interests_used_cars | 0.282595 |
| interests_interest_in_buying_a_mobile_phone | 0.283803 |
| interests_renting_residential_property | 0.295089 |
| interests_car_loans | 0.302202 |
| interests_interest_in_insurance | 0.314319 |
| interests_b2b_it_for_business | 0.332173 |
| interests_auto_premium_class | 0.340075 |
| interests_freight_and_commercial_vehicles | 0.354349 |
| interests_auto_middle_class | 0.369736 |
| interests_b2b_trade_equipment_and_goods_wholesale | 0.384276 |
| interests_auto_economy_class | 0.393064 |
| interests_b2b_raw_materials | 0.429174 |
| interests_wedding | 0.431992 |
| interests_loans_for_business | 0.457158 |
| interests_interest_in_buying_a_new_car | 0.460720 |
| interests_contributions_and_deposits | 0.485740 |
| interests_overseas_real_estate | 0.485870 |
| interests_credit_cards | 0.494378 |
| interests_b2b_medical_equipment | 0.494845 |
| interests_interest_in_buying_a_new_middle_class_car | 0.514517 |
| interests_b2b_agriculture | 0.522607 |
| interests_auto_insurance | 0.537064 |
| interests_consumer_loans | 0.543465 |
| interests_interest_in_buying_a_new_economy_class_car | 0.553366 |
| interests_interest_in_buying_a_new_premium_car | 0.589398 |
def get_embedding(geohash_h3):
ids = locs_df_g[locs_df_g["h3_cell"] == geohash_h3]["ids"].to_list()[0]
df = interests_df[[user for user in interests_df.columns if user.startswith("interests_") or "id" in user]]
df = df[df.id.isin(ids)]
df = df[[user for user in df.columns if user.startswith("interests_")]].apply(sum)
ddf = pd.DataFrame({"all": all_int_df, "hex": df})
normalized_df = (ddf - ddf.min()) / (ddf.max() - ddf.min())
diff = normalized_df["all"] - normalized_df["hex"]
return diff.to_numpy()
hex_df = pd.DataFrame({"h3_geohash":locs_df_g["h3_cell"]})# get_embedding("89118172457ffff")
hex_df["hex_emb"] = hex_df.h3_geohash.progress_apply(lambda geohash: get_embedding(geohash))
100%|██████████| 1573/1573 [00:36<00:00, 42.76it/s]
hex_df
| h3_geohash | hex_emb | |
|---|---|---|
| 0 | 8911810832fffff | [0.09279481426078284, 0.12189811352115021, 0.2... |
| 1 | 8911810836fffff | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... |
| 2 | 89118108803ffff | [0.09279481426078284, -0.21143521981218316, 0.... |
| 3 | 8911810880bffff | [0.09279481426078284, 0.7885647801878168, 0.26... |
| 4 | 8911810880fffff | [-0.24053851907255047, 0.4552314468544835, -0.... |
| ... | ... | ... |
| 1568 | 891181735b7ffff | [0.09279481426078284, 0.7885647801878168, 0.26... |
| 1569 | 891181735bbffff | [0.007428960602246251, 0.45319892652928023, 0.... |
| 1570 | 891181735c7ffff | [-0.024852244562746567, 0.4356236037172286, 0.... |
| 1571 | 891181735d3ffff | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... |
| 1572 | 891181735d7ffff | [0.05575777722374581, 0.3441203357433724, 0.11... |
1573 rows × 2 columns
from sklearn.manifold import TSNE
import plotly.express as px
from sklearn.impute import SimpleImputer
features = np.stack(hex_df.hex_emb.values)
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
features = imp.fit_transform(features)
tsne = TSNE(n_components=2, random_state=42)
projections = tsne.fit_transform(features)
fig = px.scatter(
projections, x=0, y=1,
)
fig.show("notebook")
import matplotlib.pyplot as plt
# from kneed import KneeLocator
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
kmeans = KMeans(
init="random",
n_clusters=3,
n_init=10,
max_iter=300,
random_state=42
)
kmeans.fit(features)
KMeans(init='random', n_clusters=3, n_init=10, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KMeans(init='random', n_clusters=3, n_init=10, random_state=42)
from sklearn.manifold import TSNE
import seaborn as sns
sns.color_palette("rocket")
from sklearn.cluster import KMeans
n_clusters = 4
kmeans = KMeans(n_clusters=n_clusters)
skillsPCA_labels = kmeans.fit_predict(features)
Xtsne = TSNE(n_components=2,random_state=42).fit_transform(features)
dftsne = pd.DataFrame(Xtsne)
dftsne['cluster'] = skillsPCA_labels
dftsne.columns = ['x1','x2','cluster']
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.scatterplot(data=dftsne,x='x1',y='x2',hue='cluster',legend="full",alpha=0.5)
/home/evg/.config/JetBrains/DataSpell2022.2/projects/workspace/lib/python3.10/site-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
<AxesSubplot: xlabel='x1', ylabel='x2'>
hex_df["cluster"] = dftsne["cluster"]
hex_df
| h3_geohash | hex_emb | cluster | |
|---|---|---|---|
| 0 | 8911810832fffff | [0.09279481426078284, 0.12189811352115021, 0.2... | 1 |
| 1 | 8911810836fffff | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | 1 |
| 2 | 89118108803ffff | [0.09279481426078284, -0.21143521981218316, 0.... | 1 |
| 3 | 8911810880bffff | [0.09279481426078284, 0.7885647801878168, 0.26... | 3 |
| 4 | 8911810880fffff | [-0.24053851907255047, 0.4552314468544835, -0.... | 1 |
| ... | ... | ... | ... |
| 1568 | 891181735b7ffff | [0.09279481426078284, 0.7885647801878168, 0.26... | 1 |
| 1569 | 891181735bbffff | [0.007428960602246251, 0.45319892652928023, 0.... | 0 |
| 1570 | 891181735c7ffff | [-0.024852244562746567, 0.4356236037172286, 0.... | 1 |
| 1571 | 891181735d3ffff | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | 1 |
| 1572 | 891181735d7ffff | [0.05575777722374581, 0.3441203357433724, 0.11... | 0 |
1573 rows × 3 columns
fig = px.choropleth_mapbox(
hex_df,
geojson=geojson_obj,
locations='h3_geohash',
color='cluster',
color_continuous_scale="Viridis",
range_color=(0,n_clusters-1),
mapbox_style='carto-positron',
zoom=12,
center={"lat": locs_df.lat.mean(), "lon": locs_df.lon.mean()},
opacity=0.1,
labels={'count': 'count of data'})
fig.update_layout(margin={"r": 0, "t": 1, "l": 2, "b": 3})
fig.show("notebook")